library(data.table)
library(ggplot2)
library(knitr)
library(ggrepel)
library(RColorBrewer)
library(DESeq2)
#library(rnaseqGene)
library(plotly)
library(Rtsne)
library(scales)
library(dplyr)
rm(list = ls())
setDTthreads(8)
data.output.dir <- file.path(here::here(
'..','..',
's3-roybal-tcsl',
'lenti_screen_compiled_data','data'))
load(file=file.path(data.output.dir, 'pooled_analysis_data.Rdata'))
# set this to true to re-run, else will load from s3
rerun_deseq <- F
# arrayed list of CARs
array.list <- c('BAFF-R','TNR8','4-1BB','TACI','CD28','KLRG1','CD40')
We’d like to use DESeq2’s normalization for CAR-score and to properly normalize the baseline measurements.
This can be done with either VST or rlog.
https://www.biostars.org/p/188808/ https://support.bioconductor.org/p/126713/
cts <- dcast(
read.counts[, .(
CAR.align,
sort.group.bin,
k.type, t.type, batch, assay, donor,
sort.group,
bin,
counts)],
CAR.align ~ sort.group.bin, value.var='counts')
cts <- data.frame(cts[, -1], row.names = cts[, CAR.align])
cts[is.na(cts)] <- 0
vst_cts <- reshape2::melt(varianceStabilizingTransformation(as.matrix(cts)))
## converting counts to integer mode
names(vst_cts) <- c('CAR.align','sort.group.bin', 'vst')
rlog_cts <- reshape2::melt(rlog(as.matrix(cts)))
## rlog() may take a long time with 50 or more samples,
## vst() is a much faster transformation
## converting counts to integer mode
names(rlog_cts) <- c('CAR.align','sort.group.bin', 'rlog')
rlog_cts$CAR.align <- factor(rlog_cts$CAR.align, levels=1:40,
labels=levels(vst_cts$CAR.align))
norm_cts <- merge(vst_cts, rlog_cts, by=c('CAR.align','sort.group.bin'))
read.counts <- merge(read.counts, norm_cts, by=c('CAR.align','sort.group.bin'))
read.counts[,
vst_car_score := sum((vst - mean(vst)) * sqrt(ctv.bin.score)),
by=c('CAR.align','sort.group')]
read.counts[,
rlog_car_score := sum((rlog - mean(rlog)) * sqrt(ctv.bin.score)),
by=c('CAR.align','sort.group')]
ggplot(read.counts) +
geom_boxplot(
aes(x=reorder(CAR.align,vst_car_score), y=vst_car_score)) +
coord_flip() +
facet_wrap(t.type ~ k.type) +
facet_grid(t.type ~ assay + k.type)
ggplot(read.counts) +
geom_boxplot(
aes(x=reorder(CAR.align,rlog_car_score), y=rlog_car_score)) +
coord_flip() +
facet_wrap(t.type ~ k.type) +
facet_grid(t.type ~ assay + k.type)
ggplot(read.counts) +
geom_point(
aes(x=rlog_car_score, y=CAR.score, color=interaction(donor, batch))) +
coord_flip() +
facet_wrap(t.type ~ k.type) +
facet_grid(t.type ~ assay + k.type)
ggplot(read.counts) +
geom_point(
aes(x=rlog_car_score, y=vst_car_score, color=interaction(donor, batch))) +
coord_flip() +
facet_wrap(t.type ~ k.type) +
facet_grid(t.type ~ assay + k.type)
run_deseq <- function(data.dt, ref_bin, test_bin,
control_replicates = T,
interaction = T, group.control = F, weight.bins = F)
{
# identify inputs
assay_input <- as.character(unique(data.dt[, assay]))
k_type <- as.character(unique(data.dt[, k.type]))
t_type <- as.character(unique(data.dt[, t.type]))
## 1. Do bin normalization weights =======
# bin normalization weights
data.weights <- unique(data.dt[, list(
batch, donor, timepoint, assay, t.type, k.type,
sort.group, bin, bin.pct, bin.reads)])[,
list(bin, bin.reads, bin.pct, sort.group,
read.weight=bin.pct * bin.reads / sum(bin.pct * bin.reads)),
by=.(batch, donor, timepoint, assay, t.type, k.type)][,
read.weight.norm := read.weight/exp(mean(log(read.weight))),
by=.(batch, donor, timepoint, assay, t.type, k.type)]
stopifnot(nrow(interaction(assay_input, k_type, t_type)) == 1)
# prepare cts and coldata dataframes
## 2. Prepare Ref Bins ============
if(length(ref_bin) == 1 & ref_bin[1] == 'baseline') {
# reference is baseline
# get baseline counts per donor/assay replicate
ref.bin.dt <- dcast(
data.dt[
bin == 'D' & assay == assay_input &
k.type == k_type & t.type == t_type,
.(
CAR.align,
bin.sort.group = paste(
batch, donor, timepoint, assay, t.type, 'base', sep = '_'),
k.type, t.type, batch, assay, donor,
sort.group,
bin = 'base',
counts = baseline.counts)],
CAR.align ~ bin.sort.group, value.var='counts')
ref.weights <- rep(1, ncol(ref.bin.dt))
} else {
# reference is a specified bin
ref.bin.dt <- dcast(
data.dt[
bin %in% ref_bin & assay == assay_input &
k.type == k_type & t.type == t_type,
.(
CAR.align,
bin.sort.group = paste(sort.group, bin, sep = '_'),
k.type, t.type, batch, assay, donor,
sort.group,
bin,
counts)],
CAR.align ~ bin.sort.group, value.var='counts')
ref.weights <- dcast(data.weights[
bin %in% ref_bin & assay == assay_input &
k.type == k_type & t.type == t_type,
.(
bin.sort.group = paste(sort.group, bin, sep = '_'),
k.type, t.type, batch, assay, donor,
sort.group,
bin,
read.weight.norm)],
. ~ bin.sort.group,
value.var = 'read.weight.norm')
stopifnot(nrow(ref.bin.dt) == nrow(unique(ref.bin.dt)))
}
# copy the ref bin columns for each of the test bin columns
if (interaction == T) {
num.ref.reps <- ncol(ref.bin.dt) - 1
ref.bin.dt <- cbind(ref.bin.dt[, 1],
do.call("cbind", replicate(length(test_bin),
ref.bin.dt[, -1], simplify = FALSE)))
names(ref.bin.dt) <- c(names(ref.bin.dt[, 1]),
paste(names(ref.bin.dt[, -1]), rep(test_bin, each=num.ref.reps),
sep = '_'))
}
## 3. Prepare Test Bins ============
test.bin.dt <- dcast(
data.dt[
bin %in% test_bin & assay == assay_input &
k.type == k_type & t.type == t_type,
.(
CAR.align,
bin.sort.group = paste(sort.group, bin, sep = '_'),
k.type, t.type, batch, assay, donor,
sort.group,
bin,
counts)],
CAR.align ~ bin.sort.group, value.var='counts')
# check that replicate counts match
stopifnot(nrow(ref.bin.dt) == nrow(unique(ref.bin.dt)))
stopifnot(nrow(test.bin.dt) == nrow(unique(test.bin.dt)))
## 4. Merge and create design matrix ============
cts <- merge(ref.bin.dt, test.bin.dt, by = 'CAR.align')
cts <- data.frame(cts[, -1], row.names = cts[, CAR.align])
cts[is.na(cts)] <- 0
coldata <- data.frame(
condition = c(
rep('reference', ncol(ref.bin.dt) - 1),
rep('test', ncol(test.bin.dt) - 1)),
rep = data.table(t(sapply(strsplit(c(
names(ref.bin.dt)[-1],
names(test.bin.dt)[-1]),"_"), `[`, c(1,2))))[,
paste(V1, V2, sep='_')],
bin = sapply(strsplit(c(
names(ref.bin.dt)[-1],
names(test.bin.dt)[-1]),"_"), `[`, 7),
row.names = c(names(ref.bin.dt)[-1], names(test.bin.dt)[-1]))
dds <- DESeqDataSetFromMatrix(countData = cts,
colData = coldata,
design = ~ condition + rep)
# set reference
dds$condition <- relevel(dds$condition, ref = "reference")
print(coldata)
# pre-filtering
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
# try various designs of decreasing complexity, since we don't have a
# full matrix for every example deseq run.
# try_designs <- function(designs, dds) tryCatch({
# message(paste("trying design:",as.character(designs[[1]]),"\n"))
# DESeq2::design(dds) <- designs[[1]]
# return(DESeq2::DESeq(dds)) }, error= function(e) {
# dds <- try_designs(designs[c(-1)], dds)
# message(e)
# message('getting here\n')
# return(dds)
# }
# )
#
# try_designs(
# designs=c(
# ~ condition + condition:rep + condition:bin,
# ~ condition + rep + bin,
# ~ condition + rep,
# ~ condition
# ),
# dds
# )
# if (control_replicates) {
# design(dds) <- ~ condition + rep
# }
#
# #check unique bins before using bins as contrast
# n_uniq_bins <- length(unique(coldata$bin))
#
# if (n_uniq_bins == 1 & interaction == T) {
# warning('Cannot use bin contrast, only one bin level.')
# }
#
# if (interaction == T & group.control == T & n_uniq_bins > 1) {
# design(dds) <- ~ condition + condition:rep + condition:bin
# } else if (interaction == T & n_uniq_bins > 1) {
# design(dds) <- ~ condition + rep + bin
# }
dds <- DESeq2::DESeq(dds)
res <- results(dds)
# shrink log fold change
resLFC <- lfcShrink(dds,
coef="condition_test_vs_reference", type="apeglm")
# convert to data.table
results.dt <- as.data.table(resLFC)[, CAR.align := row.names(resLFC)]
results.dt <- cbind(results.dt[, 6], results.dt[, -6])
results.dt[, assay := assay_input][, k.type := k_type][, t.type := t_type]
return(results.dt)
}
if (rerun_deseq) {
test_ref_sets <- c(ref=list(), test=list())
# A/B/AB vs C/D/CD
test_ref_sets$ref <- c(as.list(rep('D',3)), as.list(rep('C',3)), rep(list(c('C','D')),3))
test_ref_sets$test <- rep(list('A','B',c('A','B')),3)
test_ref_sets$interaction <- as.list(rep(F, 9))
# A/B/AB/ABCD vs baseline
test_ref_sets$ref <- c(test_ref_sets$ref, as.list(rep('baseline',3)))
test_ref_sets$test <- c(test_ref_sets$test, list('A','B',c('A','B','C','D')))
test_ref_sets$interaction <- c(test_ref_sets$interaction, as.list(rep(T, 3)))
all.deseq.results.dt <- data.table()
for (set_i in seq_along(test_ref_sets$ref)) {
ref_set <- test_ref_sets$ref[[set_i]]
test_set <- test_ref_sets$test[[set_i]]
ref_str <- paste0(ref_set, collapse='')
test_str <- paste0(test_set, collapse='')
inter <- test_ref_sets$interaction[[set_i]]
deseq.results.dt <- read.counts[batch != 'post-cytof' & !is.na(k.type),
{
message(paste(c(ref_str, test_str, inter, .BY[1],"\n"), collapse= ' - '));
tryCatch(
run_deseq(
data.dt = .SD,
ref_bin = ref_set,
test_bin = test_set,
interaction = inter,
group.control = T),
error= function(e) {message(e); return(data.table())}
)
},
by = .(group)]
if (nrow(deseq.results.dt) > 0) {
deseq.results.dt[,
`:=`(
ref_set = ref_str,
test_set = test_str,
inter = inter)]
}
all.deseq.results.dt <- rbind(
all.deseq.results.dt,
deseq.results.dt, fill=T)
}
save(list=c('all.deseq.results.dt'),
file=file.path(data.output.dir, 'pooled_deseq2_data.Rdata'))
}
if (!rerun_deseq) load(
file=file.path(data.output.dir, 'pooled_deseq2_data.Rdata'))
#add back CAR scores
cols_to_add <- c('CAR.score','sort.group','donor','batch')
cols_to_join <- c('group', 'CAR.align', 'assay', 'k.type', 't.type')
all.deseq.results.dt[, padj.disp := -log10(padj)]
all.deseq.results.dt[, lfc.disp := log2FoldChange]
all.deseq.results.dt[padj.disp > 10, padj.disp := Inf]
all.deseq.results.dt[abs(lfc.disp) > 5, lfc.disp := sign(lfc.disp) * Inf]
# mask receptor names except for known ones
control_domains <- c('4-1BB','CD28')
chosen_domains <- c('BAFF-R','CD40','TACI','TNR8')
neg_domain <- c('KLRG1')
all.deseq.results.dt[, CAR.type := 'other']
all.deseq.results.dt[CAR.align %in% control_domains, CAR.type := 'control']
all.deseq.results.dt[CAR.align %in% chosen_domains, CAR.type := 'chosen']
all.deseq.results.dt[CAR.align %in% neg_domain, CAR.type := 'neg']
all.deseq.results.dt[,
CAR.type := factor(CAR.type,levels=c('other','control','chosen','neg'))]
make_volcanoes <- function(data.dt) {
ggplot(data.dt, aes(
x=lfc.disp, y=padj.disp,
color=CAR.type,
label=CAR.align,
size=CAR.type)) +
geom_point() +
geom_hline(yintercept=-log10(0.05), linetype=2) +
facet_grid(test_set + ref_set ~ t.type + assay + k.type) +
scale_color_manual('',
labels=c('Other Receptors', 'CD28/4-1BB', 'New Receptors','Negative'),
values=c('grey50', RColorBrewer::brewer.pal(5, 'Paired')[c(2,4,5)])) +
scale_size_manual('',
labels=c('Other Receptors', 'CD28/4-1BB', 'New Receptors','Negative'),
values=c(1,3,3,3)) +
labs(x='Log2 FC', y='-log10(P-value)', title='Assay Volcano Plots')
}
make_timeseries <- function(data.dt) {
ggplot(data.dt, aes(
y=lfc.disp, x=assay,
color=CAR.type,
group=CAR.align,
label=CAR.align,
size=CAR.type)) +
geom_point() +
geom_line() +
facet_grid(t.type ~ test_set + ref_set) +
scale_color_manual('',
labels=c('Other Receptors', 'CD28/4-1BB', 'New Receptors','Negative'),
values=c('grey50', RColorBrewer::brewer.pal(5, 'Paired')[c(2,4,5)])) +
scale_size_manual('',
labels=c('Other Receptors', 'CD28/4-1BB', 'New Receptors','Negative'),
values=c(0.5,1,1,1)) +
labs(y='Log2 FC', x='Assay', title='Log fold change across assays')
}
make_cd4_cd8 <- function(data.dt) {
ggplot(
dcast(data.dt,
CAR.align + assay + k.type + ref_set + test_set + inter + CAR.type ~ t.type,
value.var = c("log2FoldChange", "padj.disp")),
aes(y=log2FoldChange_CD8, x=log2FoldChange_CD4,
color=CAR.type,
label=CAR.align,
size=CAR.type)) +
geom_point() +
facet_grid(test_set + ref_set ~ assay + k.type) +
scale_color_manual('',
labels=c('Other Receptors', 'CD28/4-1BB', 'New Receptors','Negative'),
values=c('grey50', RColorBrewer::brewer.pal(5, 'Paired')[c(2,4,5)])) +
scale_size_manual('',
labels=c('Other Receptors', 'CD28/4-1BB', 'New Receptors','Negative'),
values=c(1,3,3,3)) +
labs(x='CD4', y='CD8', title='Log fold change, CD4 vs CD8')
}
make_pos_neg <- function(data.dt) {
ggplot(
dcast(data.dt,
CAR.align + assay + t.type + ref_set + test_set + inter + CAR.type ~ k.type,
value.var = c("lfc.disp", "padj.disp")),
aes(y=lfc.disp_pos, x=lfc.disp_neg,
color=CAR.type,
label=CAR.align,
size=CAR.type)) +
geom_point() +
facet_grid(test_set + ref_set ~ t.type + assay) +
scale_color_manual('',
labels=c('Other Receptors', 'CD28/4-1BB', 'New Receptors','Negative'),
values=c('grey50', RColorBrewer::brewer.pal(5, 'Paired')[c(2,4,5)])) +
scale_size_manual('',
labels=c('Other Receptors', 'CD28/4-1BB', 'New Receptors','Negative'),
values=c(1,3,3,3)) +
labs(x='CD19-', y='CD19+', title='Log fold change, CD19+ vs CD19-')
}
Comparisons on x/y, all combos
x_group <- 'prolif2_d1_3d_CTV1_CD4_neg'
y_group <- 'prolif2_d1_3d_CTV1_CD4_pos'
cast_comparison <- function(
comp.df, x_group, y_group, value_col='CAR.score', xycols=c('x','y'),
rescale= 'combined') {
#message(paste(x_group, y_group, sep=', '))
cast_groups <- dcast(
unique(comp.df[sort.group %in% c(x_group, y_group),
list(CAR.align, sort.group, get(value_col))]),
CAR.align ~ sort.group, value.var = 'V3')[,
`:=`(x.group= x_group, y.group= y_group)]
names(cast_groups)[c(2,3)] <- xycols
stopifnot(rescale %in% c('combined','separate'))
# rescle == combined:
# rescale both x and y to (0,1) on same scale
xy_scalemin = cast_groups[, min(c(x,y), na.rm=T)]
xy_scalemax = cast_groups[, max(c(x,y), na.rm=T)]
tryCatch({
cast_groups[, x_scaled_comb := scales::rescale(
x, from=c(min(c(x,y), na.rm=T), max(c(x,y), na.rm=T)))]
cast_groups[, y_scaled_comb := scales::rescale(
y, from=c(min(c(x,y), na.rm=T), max(c(x,y), na.rm=T)))]
}, error = function(e) {
message(e)
cast_groups[, x_scaled_comb := NaN]
cast_groups[, y_scaled_comb := NaN]}
)
# rescle == separate:
# rescale x and y to (0,1) on individual scales
tryCatch({
cast_groups[, x_scaled_sep := scales::rescale(x)]
cast_groups[, y_scaled_sep := scales::rescale(y)]
}, error = function(e) {
message(e)
cast_groups[, x_scaled_sep := NaN]
cast_groups[, y_scaled_sep := NaN]}
)
}
# use malanhanobis distance to collapse replicates,
# then use median absolute deviation to identify outliers
#https://www.r-craft.org/r-news/combined-outlier-detection-with-dplyr-and-ruler/
maha_dist <- . %>% select_if(is.numeric) %>%
mahalanobis(center = colMeans(.), cov = cov(.))
isnt_out_maha <- function(tbl, isnt_out_f, ...) {
tbl %>% maha_dist() %>% isnt_out_f(...)
}
isnt_out_mad <- function(x, thres = 3, na.rm = TRUE) {
abs(x - median(x, na.rm = na.rm)) <= thres * mad(x, na.rm = na.rm)
}
top_x_mad <- function(x, top=5, na.rm = TRUE) {
mads <- abs(x - median(x, na.rm = na.rm))
top_mads <- order(-mads)[1:top]
return(!(1:length(mads) %in% top_mads))
}
plot_all_reps <- function(df=read.counts, value_col) {
all_rep_comparisons <- df[
grepl('CTV', assay),
data.table(matrix(combn(unique(sort.group), 2), ncol=2, byrow=T)),
by=c('assay','t.type','k.type')]
names(all_rep_comparisons)[4:5] <- c('x.group','y.group')
all_rep_comparisons <- all_rep_comparisons[,
cast_comparison(df, x.group, y.group, value_col=value_col)[,
c('x.group','y.group') := NULL],
by=c('assay','t.type','k.type','x.group','y.group')]
# combine with baseline abundance as color
baseline.abund <- df[
assay=='baseline', list(mean.baseline= mean(car.abund.baseline, na.rm=T)),
by=c('t.type','CAR.align')][,
list(CAR.align,
rel.baseline.log= log10(mean.baseline/mean(mean.baseline))),
by=c('t.type')]
all_rep_comparisons <-all_rep_comparisons[
baseline.abund, on=c('t.type','CAR.align')]
all_rep_comparisons[, rep_pair := paste(
gsub('(prolif\\d_d\\d).*','\\1', x.group),
gsub('(prolif\\d_d\\d).*','\\1', y.group),
sep='\n')]
all_rep_comparisons[, assay_kt := paste(assay,t.type,k.type, sep='\n')]
# use malanhanobis distance to collapse replicates,
# then use median absolute deviation to identify outliers
all_rep_comparisons[,
outlier := {
nonsingular <- apply(tibble(x_scaled_sep, y_scaled_sep), 2,
function (x) var(x) > 0 & !is.na(var(x)))
non_singular_sd <- tibble(x_scaled_sep, y_scaled_sep)[,
(nonsingular)]
!isnt_out_maha(non_singular_sd, top_x_mad)
}, by=.(rep_pair, k.type, t.type, assay)]
all_rep_comparisons[, label_outliers := ''][outlier == T,
label_outliers := CAR.align]
all_rep_comparisons[, label_arrayed := ''][
CAR.align %in% array.list & !outlier,
label_arrayed := CAR.align]
non_na_comps <- all_rep_comparisons[,
!any(is.na(list(var(x_scaled_sep), var(y_scaled_sep)))),
by=.(rep_pair, k.type, t.type, assay)]
r_squareds <- all_rep_comparisons[
non_na_comps[V1==T], on=.(rep_pair, k.type, t.type, assay)][V1 == T][,
list(r=sqrt(summary(lm(x_scaled_sep ~ y_scaled_sep))$r.squared)),
by=.(rep_pair, k.type, t.type, assay)]
r_squareds[, r_lbl := paste('r=',as.character(round(r, 2)))]
return(ggplot(data=all_rep_comparisons,
aes(x_scaled_sep, y_scaled_sep)) +
geom_point(shape = 21, colour = "grey30", aes(fill=rel.baseline.log)) +
geom_text_repel(size=2.5, color='grey20', aes(label=label_outliers)) +
geom_text_repel(size=2.5, color='lightsalmon4', aes(label=label_arrayed)) +
theme_bw() +
scale_fill_distiller(palette='BrBG',
limit=c(-1,1) * max(abs(all_rep_comparisons$rel.baseline.log))) +
geom_label(size=2.5, aes(x=Inf, y=-Inf, label=r_lbl), data=r_squareds,
hjust=1, vjust=0) +
facet_grid(rep_pair ~ k.type + t.type + assay))
}
read.counts[, car.abund.log := log10(car.abund)]
#assay_rep_list:
assay_rep_set <- unique(read.counts[
data.table(
assay=c('baseline','CTV1','CTV2','CTV3'),
prev_assay=c(NA,'baseline','CTV1','CTV2')), on='assay'][,
list(assay, sort.group, prev_assay, donor, batch, t.type, k.type)])
#map day0
assay_rep_set <- rbind(
assay_rep_set[assay=='baseline'][, k.type := 'pos'],
assay_rep_set[assay=='baseline'][, k.type := 'neg'],
assay_rep_set[assay!='baseline'])
#merge with prev copy to get prev sort.group correspondence
assay_rep_set <- assay_rep_set[, list(
donor, batch, t.type, k.type,
prev_assay=assay,prev.sort.group=sort.group)][
assay_rep_set,
on=c('donor','batch','t.type','k.type','prev_assay')]
# use baseline from prolif2 always
assay_rep_set[assay == 'CTV1',
prev.sort.group := gsub('prolif1','prolif2',prev.sort.group)]
#merge with prev measurements
prev_measure <- unique(read.counts[, list(
car.abund.prev=car.abund, prev.sort.group=sort.group, CAR.align)])[
assay_rep_set, on=c('prev.sort.group')]
#merge with orig read counts
read.counts <- prev_measure[, list(
car.abund.prev, prev.sort.group, CAR.align, sort.group)][
read.counts, on=c('sort.group','CAR.align')]
#calculate relative prev
read.counts[, car.abund.rel.prev := car.abund/car.abund.prev]
plot_all_reps(value_col='CAR.score') +
labs(title='CAR score replicate comparison')
## Warning in summary.lm(lm(x_scaled_sep ~ y_scaled_sep)): essentially perfect fit:
## summary may be unreliable
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
plot_all_reps(value_col='car.abund') +
labs(title='CAR Abundance replicate comparison')
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
plot_all_reps(value_col='car.abund.log') +
labs(title='CAR Log Abundance replicate comparison')
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
plot_all_reps(value_col='car.abund.rel.baseline') +
labs(title='CAR Relative Abundance Change to Baseline, replicate comparison')
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
plot_all_reps(value_col='car.abund.rel.prev') +
labs(title='CAR Relative Abundance Change to Previous, replicate comparison')
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: Removed 81 rows containing missing values (geom_point).
## Warning: Removed 81 rows containing missing values (geom_text_repel).
## Warning: Removed 81 rows containing missing values (geom_text_repel).
plot_all_reps(value_col='vst_car_score') +
labs(title='VST-normalized CAR score replicate comparison')
## Warning in summary.lm(lm(x_scaled_sep ~ y_scaled_sep)): essentially perfect fit:
## summary may be unreliable
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
plot_all_reps(value_col='rlog_car_score') +
labs(title='rlog-normalized CAR score replicate comparison')
## Warning in summary.lm(lm(x_scaled_sep ~ y_scaled_sep)): essentially perfect fit:
## summary may be unreliable
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
comp.df <- copy(read.counts[assay != 'baseline' & batch != 'post-cytof'])
comp.df[, car.abund.rel.prev.scaled := scales::rescale(car.abund.rel.prev),
by=sort.group]
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
comp.df[, car.abund.rel.baseline.scaled := scales::rescale(car.abund.rel.baseline),
by=sort.group]
comp.df[, CAR.score.scaled := scales::rescale(CAR.score),
by=sort.group]
comp.df[, vst_car_score.scaled := scales::rescale(vst_car_score),
by=sort.group]
comp.df[, rlog_car_score.scaled := scales::rescale(rlog_car_score),
by=sort.group]
baseline.abund <- read.counts[
assay=='baseline', list(mean.baseline= mean(car.abund.baseline, na.rm=T)),
by=c('t.type','CAR.align')][,
list(CAR.align,
rel.baseline.log= log10(mean.baseline/mean(mean.baseline))),
by=c('t.type')]
comp.df <-comp.df[baseline.abund, on=c('t.type','CAR.align')]
plot_measure_pair <- function(df=comp.df, measure_i, measure_j) {
# fix overplotting:
df <- unique(df[!is.na(get(measure_i)) & !is.na(get(measure_j)), list(
get(measure_i), get(measure_j),
rel.baseline.log, CAR.align,
batch, donor, k.type, t.type, assay)])
names(df)[c(1,2)] <- c(measure_i, measure_j)
# use malanhanobis distance to collapse replicates,
# then use median absolute deviation to identify outliers
df[, outlier := {
#print(.BY)
#print(tibble(get(measure_i), get(measure_j)))
nonsingular <- apply(tibble(get(measure_i), get(measure_j)), 2,
function (x) var(x) > 0 & !is.na(var(x)))
non_singular_sd <- tibble(get(measure_i), get(measure_j))[,
(nonsingular)]
# if both measures are singular
if (!any(nonsingular)) (FALSE)
# for specific base of prev vs baseline and CTV1, will be singular
else if ((ncol(non_singular_sd) == 2 &&
all(non_singular_sd[,1] == non_singular_sd[,2])) |
ncol(non_singular_sd) != 2) {
!top_x_mad(pull(non_singular_sd, 1))
} else {
!isnt_out_maha(non_singular_sd, top_x_mad)
}
}, by=.(batch, donor, k.type, t.type, assay)]
df[, label_outliers := ''][outlier == T,
label_outliers := CAR.align]
df[, label_arrayed := ''][
CAR.align %in% array.list & !outlier,
label_arrayed := CAR.align]
r_squareds <- df[,
list(r=sqrt(summary(lm(data=.SD,
as.formula(paste(measure_i, "~", measure_j))))$r.squared)),
by=.(batch, donor, k.type, t.type, assay)]
r_squareds[, r_lbl := paste('r=',as.character(round(r, 2)))]
return(ggplot(df, aes_string(
x=measure_i, y=measure_j)) +
geom_point(data=df, shape = 21, colour = "grey30", aes(fill=rel.baseline.log)) +
geom_text_repel(data=df, size=2.5, color='grey20', aes(label=label_outliers)) +
geom_text_repel(data=df, size=2.5, color='lightsalmon4', aes(label=label_arrayed)) +
theme_bw() +
geom_label(size=2.5, aes(x=Inf, y=-Inf, label=r_lbl), data=r_squareds,
hjust=1, vjust=0) +
scale_fill_distiller(palette='BrBG',
limit=c(-1,1) * max(abs(df$rel.baseline.log))) +
facet_grid(batch+donor ~ k.type + t.type + assay) +
labs(title=paste(measure_i,'vs.',measure_j,'Individual replicates')))
}
deseq2_rc_measures <- comp.df[
dcast(all.deseq.results.dt[,
contrast := paste(ref_set, test_set, sep='_v_')][,
`:=`(log2FoldChange.scaled=scales::rescale(log2FoldChange)),
by=.(k.type, t.type, assay, contrast)],
k.type + t.type + assay + CAR.align ~ contrast,
value.var = c('log2FoldChange', 'padj')),
on=c('k.type','t.type','assay','CAR.align')]
plot_measure_pair(comp.df, 'vst_car_score.scaled', 'CAR.score.scaled')
## Warning in summary.lm(lm(data = .SD, as.formula(paste(measure_i, "~",
## measure_j)))): essentially perfect fit: summary may be unreliable
plot_measure_pair(comp.df, 'rlog_car_score.scaled', 'CAR.score.scaled')
## Warning in summary.lm(lm(data = .SD, as.formula(paste(measure_i, "~",
## measure_j)))): essentially perfect fit: summary may be unreliable
plot_measure_pair(comp.df, 'vst_car_score.scaled', 'rlog_car_score.scaled')
## Warning in summary.lm(lm(data = .SD, as.formula(paste(measure_i, "~",
## measure_j)))): essentially perfect fit: summary may be unreliable
plot_measure_pair(comp.df, 'CAR.score.scaled', 'car.abund.rel.baseline.scaled')
## Warning in summary.lm(lm(data = .SD, as.formula(paste(measure_i, "~",
## measure_j)))): essentially perfect fit: summary may be unreliable
plot_measure_pair(comp.df, 'vst_car_score.scaled', 'car.abund.rel.baseline.scaled')
## Warning in summary.lm(lm(data = .SD, as.formula(paste(measure_i, "~",
## measure_j)))): essentially perfect fit: summary may be unreliable
plot_measure_pair(comp.df, 'rlog_car_score.scaled', 'car.abund.rel.baseline.scaled')
## Warning in summary.lm(lm(data = .SD, as.formula(paste(measure_i, "~",
## measure_j)))): essentially perfect fit: summary may be unreliable
plot_measure_pair(comp.df, 'CAR.score.scaled', 'car.abund.rel.prev.scaled')
## Warning in summary.lm(lm(data = .SD, as.formula(paste(measure_i, "~",
## measure_j)))): essentially perfect fit: summary may be unreliable
plot_measure_pair(comp.df, 'vst_car_score.scaled', 'car.abund.rel.prev.scaled')
## Warning in summary.lm(lm(data = .SD, as.formula(paste(measure_i, "~",
## measure_j)))): essentially perfect fit: summary may be unreliable
plot_measure_pair(comp.df, 'rlog_car_score.scaled', 'car.abund.rel.prev.scaled')
## Warning in summary.lm(lm(data = .SD, as.formula(paste(measure_i, "~",
## measure_j)))): essentially perfect fit: summary may be unreliable
plot_measure_pair(comp.df, 'car.abund.rel.baseline.scaled', 'car.abund.rel.prev.scaled')
## Warning in summary.lm(lm(data = .SD, as.formula(paste(measure_i, "~",
## measure_j)))): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm(data = .SD, as.formula(paste(measure_i, "~",
## measure_j)))): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm(data = .SD, as.formula(paste(measure_i, "~",
## measure_j)))): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm(data = .SD, as.formula(paste(measure_i, "~",
## measure_j)))): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm(data = .SD, as.formula(paste(measure_i, "~",
## measure_j)))): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm(data = .SD, as.formula(paste(measure_i, "~",
## measure_j)))): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm(data = .SD, as.formula(paste(measure_i, "~",
## measure_j)))): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm(data = .SD, as.formula(paste(measure_i, "~",
## measure_j)))): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm(data = .SD, as.formula(paste(measure_i, "~",
## measure_j)))): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm(data = .SD, as.formula(paste(measure_i, "~",
## measure_j)))): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm(data = .SD, as.formula(paste(measure_i, "~",
## measure_j)))): essentially perfect fit: summary may be unreliable
plot_measure_pair(deseq2_rc_measures,
'log2FoldChange_D_v_A',
'car.abund.rel.prev.scaled') +
labs(title='DeSeq2 D:A vs Abundance-Prev')
plot_measure_pair(deseq2_rc_measures,
'log2FoldChange_D_v_A',
'car.abund.rel.prev.scaled') +
labs(title='DeSeq2 D:A vs Abundance-Prev')
plot_measure_pair(deseq2_rc_measures,
'log2FoldChange_D_v_A',
'car.abund.rel.baseline.scaled') +
labs(title='DeSeq2 D:A vs Abundance-Baseline')
plot_measure_pair(deseq2_rc_measures,
'log2FoldChange_CD_v_AB',
'car.abund.rel.prev.scaled') +
labs(title='DeSeq2 CD:AB vs Abundance-Prev')
plot_measure_pair(deseq2_rc_measures,
'log2FoldChange_CD_v_AB',
'car.abund.rel.baseline.scaled') +
labs(title='DeSeq2 CD:AB vs Abundance-Baseline')
bin_pcts <- dcast(
comp.df[,
list(bin, cell.count, car.bin.pct, sort.group,
assay_group= paste(.BY[c(2,3,4)], collapse='_')),
by=c('CAR.align','assay','t.type','k.type')],
CAR.align + assay + t.type + k.type + sort.group + assay_group ~ cell.count,
value.var='car.bin.pct')
cell_counts <- dcast(comp.df[,
list(bin, cell.count, car.bin.pct, sort.group, car.bin.read.pct,
assay_group= paste(.BY[c(2,3,4)], collapse='_')),
by=c('CAR.align','assay','t.type','k.type')][,
cell.bin.count := round(car.bin.read.pct * cell.count)],
CAR.align + assay + t.type + k.type + sort.group ~ bin,
value.var='cell.bin.count')
cell_totals <- dcast(
unique(comp.df[, list(bin, cell.count, sort.group)][,
bin := paste(bin,'total',sep='.')])[,
cell.pct := cell.count/sum(cell.count, na.rm=T), by=c('sort.group')],
sort.group ~ bin,
value.var='cell.pct')
cell_pcts <- dcast(
cell_counts[, data.table(bins=c('A','B','C','D'),
c(A,B,C,D)/sum(c(A,B,C,D), na.rm=T)), by=.(CAR.align, sort.group)],
CAR.align + sort.group ~ bins,
value.var='V2')
cell_counts[cell_pcts, on=c('CAR.align','sort.group')][cell_totals, on=c('sort.group')]
## CAR.align assay t.type k.type sort.group A B
## 1: 4-1BB CTV2 CD4 neg prolif1_d2_14d_CTV2_CD4_neg 1162 13189
## 2: BAFF-R CTV2 CD4 neg prolif1_d2_14d_CTV2_CD4_neg 302 2215
## 3: BCMA CTV2 CD4 neg prolif1_d2_14d_CTV2_CD4_neg 97 479
## 4: BTLA CTV2 CD4 neg prolif1_d2_14d_CTV2_CD4_neg 158 377
## 5: CD2 CTV2 CD4 neg prolif1_d2_14d_CTV2_CD4_neg 153 859
## ---
## 1235: Siglec-3 CTV1 CD8 pos prolif2_d2_4d_CTV1_CD8_pos 7796 22672
## 1236: TACI CTV1 CD8 pos prolif2_d2_4d_CTV1_CD8_pos 3888 26282
## 1237: TIGIT CTV1 CD8 pos prolif2_d2_4d_CTV1_CD8_pos 26489 71586
## 1238: TLT-1 CTV1 CD8 pos prolif2_d2_4d_CTV1_CD8_pos 1692 19200
## 1239: TNR8 CTV1 CD8 pos prolif2_d2_4d_CTV1_CD8_pos 122 3914
## C D i.A i.B i.C i.D A.total
## 1: 16667 50109 0.01432322 0.16257226 0.20544332 0.61766120 0.01734877
## 2: 2920 9735 0.01990509 0.14599262 0.19245979 0.64164250 0.01734877
## 3: 678 4617 0.01652189 0.08158746 0.11548288 0.78640777 0.01734877
## 4: 248 7740 0.01853807 0.04423325 0.02909774 0.90813094 0.01734877
## 5: 1254 5460 0.01980326 0.11118302 0.16230909 0.70670463 0.01734877
## ---
## 1235: 25996 5891 0.12502606 0.36359554 0.41690322 0.09447518 0.28161308
## 1236: 12630 1560 0.08764653 0.59247069 0.28471596 0.03516682 0.28161308
## 1237: 64814 13733 0.14997565 0.40530625 0.36696448 0.07775362 0.28161308
## 1238: 11455 1249 0.05036314 0.57149661 0.34096321 0.03717704 0.28161308
## 1239: 3152 748 0.01537298 0.49319556 0.39717742 0.09425403 0.28161308
## B.total C.total D.total
## 1: 0.1128972 0.1620473 0.7077067
## 2: 0.1128972 0.1620473 0.7077067
## 3: 0.1128972 0.1620473 0.7077067
## 4: 0.1128972 0.1620473 0.7077067
## 5: 0.1128972 0.1620473 0.7077067
## ---
## 1235: 0.3202824 0.2850362 0.1130683
## 1236: 0.3202824 0.2850362 0.1130683
## 1237: 0.3202824 0.2850362 0.1130683
## 1238: 0.3202824 0.2850362 0.1130683
## 1239: 0.3202824 0.2850362 0.1130683
# cell_counts[cell_pcts, on=c('CAR.align','sort.group')][
# cell_totals, on=c('sort.group')][
# sort.group == 'prolif2_d2_4d_CTV1_CD8_pos' & CAR.align == 'TNR8', list(
# theta_hat= ddirichlet(c(i.A, i.B, i.C, i.D), c(A, B, C, D)),
# theta=ddirichlet(c(A.total, B.total, C.total, D.total), c(A, B, C, D))),
# by=c('CAR.align','sort.group')]
all_rep_comparisons <- read.counts[
grepl('CTV', assay),
data.table(matrix(combn(unique(sort.group), 2), ncol=2, byrow=T)),
by=c('assay','t.type')]
names(all_rep_comparisons)[3:4] <- c('x.group','y.group')
all_rep_comparisons <- all_rep_comparisons[,
cast_comparison(read.counts, x.group, y.group)[,
c('x.group','y.group') := NULL],
by=c('assay','t.type','x.group','y.group')]
ggplot(all_rep_comparisons[assay == 'CTV3']) +
geom_point(aes(x_scaled_sep, y_scaled_sep)) +
facet_wrap(assay + t.type ~ x.group + y.group)
## Warning: Removed 3 rows containing missing values (geom_point).
all_rep_comparisons <- read.counts[
grepl('CTV', assay),
data.table(matrix(combn(unique(sort.group), 2), ncol=2, byrow=T)),
by=c('assay','k.type')]
names(all_rep_comparisons)[3:4] <- c('x.group','y.group')
all_rep_comparisons <- all_rep_comparisons[,
cast_comparison(read.counts, x.group, y.group)[,
c('x.group','y.group') := NULL],
by=c('assay','x.group','y.group')]
ggplot(all_rep_comparisons[assay == 'CTV3']) +
geom_point(aes(x_scaled_sep, y_scaled_sep)) +
facet_wrap(assay ~ x.group + y.group)
## Warning: Removed 4 rows containing missing values (geom_point).
# remove prolif1.d2.ctv3 for now - CD4 has only 1 bin, D,
# and KLRG1 is on top for CD19+ ...need to check this more
pos_neg_comparisons <- read.counts[grepl('CTV', assay), {
if (length(unique(sort.group)) == 2)
cast_comparison(
.SD,
unique(sort.group[k.type == 'neg']),
unique(sort.group[k.type == 'pos']))
},
by=c('donor','assay','batch','t.type')][
!(donor == 'd2' & batch == 'prolif1' & assay == 'CTV3')]
ggplot(pos_neg_comparisons[, prolif_donor := interaction(batch, donor)],
aes(x=x_scaled_comb, y=y_scaled_comb, label=CAR.align)) +
geom_point() +
facet_grid(prolif_donor+t.type~assay) +
geom_text_repel(data=pos_neg_comparisons[,
prolif_donor := interaction(batch, donor)][
CAR.align %in% c('BAFF-R','TNR8','4-1BB','TACI','CD28','KLRG1','CD40')]) +
labs(x='CD19- Proliferation (+/- scaled together, per replicate)',
y='CD19+ Proliferation (+/- scaled together, per replicate)')
ggplot(pos_neg_comparisons[, prolif_donor := interaction(batch, donor)],
aes(x=x_scaled_comb/y_scaled_comb, y=y_scaled_sep, label=CAR.align)) +
geom_point() +
facet_grid(prolif_donor+t.type~assay) +
geom_text_repel(data=pos_neg_comparisons[,
prolif_donor := interaction(batch, donor)][
CAR.align %in% c('BAFF-R','TNR8','4-1BB','TACI','CD28','KLRG1','CD40')]) +
labs(x='CD19+ Proliferation (+ scaled alone, per replicate)',
y='CD19- : CD19+ Proliferation Ratio')
pos_neg_comparisons_melt <- melt(
pos_neg_comparisons[,
xy_ratio := x_scaled_comb/y_scaled_comb], measure.vars= grep(
'^[xy][^.]', names(pos_neg_comparisons), perl=T, value=T)
)[,
`:=`(var_mean= mean(value, na.rm=T), var_sd= sd(value, na.rm=T)),
by=c('t.type','assay','CAR.align','variable')]
cast_left <- paste(names(pos_neg_comparisons_melt)[1:6], collapse = ' + ')
cast_right <- 'variable'
cast_formula <- paste(cast_left, cast_right, sep= ' ~ ')
pos_neg_assay_avg <- dcast(
pos_neg_comparisons_melt, cast_formula, value.var='var_mean')
pos_neg_assay_plot <- unique(
pos_neg_assay_avg[, list(t.type, assay, xy_ratio, y_scaled_sep, CAR.align)])
pos_neg_assay_plot[, label.few := '']
pos_neg_assay_plot[
CAR.align %in% c('BAFF-R','TNR8','4-1BB','TACI','CD28','KLRG1','CD40'),
label.few := CAR.align]
ggplot(pos_neg_assay_plot,
aes(x=xy_ratio, y=y_scaled_sep, label=label.few)) +
geom_point() +
facet_wrap(t.type ~ assay) +
geom_text_repel() +
labs(
title='Average Prolifertation and -/+ ratio across replicates',
y='CD19+ Proliferation (scaled for replicate)',
x='CD19- : CD19+ Proliferation Ratio')
ggplotly(ggplot(pos_neg_assay_plot,
aes(x=xy_ratio, y=y_scaled_sep, label=CAR.align)) +
geom_point() +
facet_wrap(t.type ~ assay) +
labs(
title='Average Prolifertation and -/+ ratio across replicates (interactive)',
y='CD19+ Proliferation (scaled for replicate)',
x='CD19- : CD19+ Proliferation Ratio'))
ggplot_pos_neg_assay <- ggplot(pos_neg_assay_plot,
aes(x=xy_ratio, y=y_scaled_sep, label=label.few)) +
geom_point() +
facet_grid(t.type ~ assay) +
scale_x_continuous(labels=percent) +
geom_text_repel(size=2.5) +
geom_vline(xintercept = -Inf) + geom_hline(yintercept = -Inf) +
theme_minimal() +
labs(
title='Average Prolifertation and -/+ ratio across replicates',
y='CD19+ Proliferation (Scaled)',
x='Relative Nonspecific Proliferation')
ggplot_pos_neg_assay
ggsave(here::here('..','figs','pooled','relative_prolif.pdf'),
ggplot_pos_neg_assay, height=4, width=7, useDingbats=FALSE)
pos_neg_comparisons_melt <- melt(
pos_neg_comparisons[,
xy_ratio := x_scaled_comb/y_scaled_comb], measure.vars= grep(
'^[xy][^.]', names(pos_neg_comparisons), perl=T, value=T)
)[,
`:=`(var_mean= mean(value, na.rm=T), var_sd= sd(value, na.rm=T)),
by=c('t.type','CAR.align','variable')]
cast_left <- paste(names(pos_neg_comparisons_melt)[1:6], collapse = ' + ')
cast_right <- 'variable'
cast_formula <- paste(cast_left, cast_right, sep= ' ~ ')
pos_neg_assay_avg <- dcast(
pos_neg_comparisons_melt, cast_formula, value.var='var_mean')
pos_neg_assay_plot <- unique(
pos_neg_assay_avg[, list(t.type, xy_ratio, y_scaled_sep, CAR.align)])
pos_neg_assay_plot[, label.few := '']
pos_neg_assay_plot[
CAR.align %in% c('BAFF-R','TNR8','4-1BB','TACI','CD28','KLRG1','CD40'),
label.few := CAR.align]
ggplot_pos_neg_assay <- ggplot(pos_neg_assay_plot,
aes(x=xy_ratio, y=y_scaled_sep, label=label.few)) +
geom_point() +
facet_grid(t.type ~ .) +
scale_x_continuous(labels=percent) +
geom_text_repel(size=2.5) +
geom_vline(xintercept = -Inf) + geom_hline(yintercept = -Inf) +
theme_minimal() +
labs(
title='Average Prolifertation and -/+ ratio across replicates',
y='CD19+ Proliferation (Scaled)',
x='Relative Nonspecific Proliferation')
##TODO Change labelled CARs to ones significant in the CD19+ panel
ggsave(here::here('..','figs','pooled','relative_prolif_t_mean.pdf'),
ggplot_pos_neg_assay, height=4, width=7, useDingbats=FALSE)
pos_neg_comparisons_melt <- melt(
pos_neg_comparisons[,
xy_ratio := x_scaled_comb/y_scaled_comb], measure.vars= grep(
'^[xy][^.]', names(pos_neg_comparisons), perl=T, value=T)
)[,
`:=`(var_mean= mean(value, na.rm=T), var_sd= sd(value, na.rm=T)),
by=c('t.type','CAR.align','variable')]
cast_left <- paste(names(pos_neg_comparisons_melt)[1:6], collapse = ' + ')
cast_right <- 'variable'
cast_formula <- paste(cast_left, cast_right, sep= ' ~ ')
pos_neg_assay_avg <- dcast(
pos_neg_comparisons_melt, cast_formula, value.var='var_mean')
pos_neg_assay_plot <- unique(
pos_neg_assay_avg[, list(t.type, x_scaled_comb, y_scaled_comb, CAR.align)])
pos_neg_assay_plot[, label.few := '']
pos_neg_assay_plot[
CAR.align %in% c('BAFF-R','TNR8','4-1BB','TACI','CD28','KLRG1','CD40'),
label.few := CAR.align]
# simpler plot
ggplot_pos_neg_assay <- ggplot(pos_neg_assay_plot,
aes(x=x_scaled_comb, y=y_scaled_comb, label=label.few)) +
geom_point() +
facet_grid(t.type ~ .) +
scale_x_continuous(expand=expansion(mult=1, add=0), limits=c(0, NA)) +
expand_limits(x=0) +
geom_text_repel(size=2.5) +
geom_vline(xintercept = -Inf) + geom_hline(yintercept = -Inf) +
theme_minimal() +
labs(
title='Proliferation, +/- antigen',
y='CD19+ Proliferation (scaled per replicate)',
x='CD19- Proliferation (scaled per replicate)')
# with axis cuts
ggplot_pos_neg_assay <- ggplot(pos_neg_assay_plot,
aes(x=x_scaled_comb, y=y_scaled_comb, label=label.few)) +
geom_point() +
facet_grid(t.type ~ .) +
scale_x_continuous(expand=expansion(mult=c(0,.1), add=0), limits=c(0,NA)) +
scale_y_continuous(limits=c(0.48, 0.95), expand=expansion(mult=c(0,0), add=0),
breaks=(5:10)/10, labels=c('', (6:10)/10)) +
geom_text_repel(size=2.5) +
geom_vline(xintercept = -Inf) + geom_hline(yintercept = -Inf) +
labs(
title='Proliferation, +/- antigen',
y='CD19+ Proliferation (scaled per replicate)',
x='CD19- Proliferation\n(scaled per replicate)') +
theme_minimal() +
theme(panel.spacing.y=unit(1.5, "lines"))
# fig2 style
# with axis cuts
ggplot_pos_neg_assay <- ggplot(pos_neg_assay_plot,
aes(x=x_scaled_comb, y=y_scaled_comb, label=label.few)) +
geom_point() +
facet_grid(t.type ~ .) +
scale_x_continuous(expand=expansion(mult=c(0,.1), add=0), limits=c(0,NA)) +
scale_y_continuous(limits=c(0.48, 0.95), expand=expansion(mult=c(0,0), add=0),
breaks=(5:10)/10, labels=c('', (6:10)/10)) +
geom_text_repel(size=2.5) +
geom_vline(xintercept = -Inf) + geom_hline(yintercept = -Inf) +
labs(
title='Proliferation, +/- antigen',
y='CD19+ Proliferation (scaled per replicate)',
x='CD19- Proliferation\n(scaled per replicate)') +
theme_minimal() +
theme(panel.spacing.y=unit(1.5, "lines"))
ggsave(here::here('..','figs','pooled','scaled_prolif_t_mean.pdf'),
ggplot_pos_neg_assay, height=6, width=3.6, useDingbats=FALSE)
Dan likes this one the best and will use for Fig 2B (until someone complains again).
pos_neg_comparisons_melt <- melt(
pos_neg_comparisons[,
xy_ratio := x_scaled_comb/y_scaled_comb], measure.vars= grep(
'^[xy][^.]', names(pos_neg_comparisons), perl=T, value=T)
)[,
`:=`(var_mean= mean(value, na.rm=T), var_sd= sd(value, na.rm=T)),
by=c('t.type','CAR.align','variable', 'assay')]
cast_left <- paste(names(pos_neg_comparisons_melt)[1:6], collapse = ' + ')
cast_right <- 'variable'
cast_formula <- paste(cast_left, cast_right, sep= ' ~ ')
pos_neg_assay_avg <- dcast(
pos_neg_comparisons_melt, cast_formula, value.var='var_mean')
pos_neg_assay_plot <- unique(
pos_neg_assay_avg[, list(t.type, x_scaled_comb, y_scaled_comb, CAR.align, assay)])
pos_neg_assay_plot[, label.few := '']
pos_neg_assay_plot[
CAR.align %in% c('BAFF-R','TNR8','4-1BB','TACI','CD28','KLRG1','CD40'),
label.few := CAR.align]
ggplot_pos_neg_assay <- ggplot(pos_neg_assay_plot[assay == 'CTV1'],
aes(x=x_scaled_comb, y=y_scaled_comb, label=label.few)) +
geom_point() +
facet_grid(t.type ~ .) +
scale_x_continuous() +
geom_text_repel(size=2.5) +
geom_vline(xintercept = -Inf) + geom_hline(yintercept = -Inf) +
theme_minimal() +
labs(
title='Average Prolifertation and -/+ ratio across replicates',
y='CTV1 CD19+ Proliferation (Scaled)',
x='CTV1 CD19- Proliferation (Scaled)')
ggsave(here::here('..','figs','pooled','scaled_prolif_t_mean_ctv1.pdf'),
ggplot_pos_neg_assay, height=4, width=7, useDingbats=FALSE)
ggplotly(make_volcanoes(all.deseq.results.dt[
k.type == 'pos' & ref_set == 'baseline']),
tooltip = "label", session='knitr')
ggplotly(make_cd4_cd8(all.deseq.results.dt[
k.type == 'pos' & ref_set == 'baseline']),
tooltip = "label", session='knitr')
ggplotly(make_timeseries(all.deseq.results.dt[
k.type == 'pos' & ref_set == 'baseline']),
tooltip = "label", session='knitr')
ggplotly(make_pos_neg(all.deseq.results.dt[
ref_set == 'baseline']),
tooltip = "label", session='knitr')
ggplotly(make_volcanoes(all.deseq.results.dt[
k.type == 'pos' & ref_set != 'baseline']),
tooltip = "label", session='knitr')
ggplotly(make_cd4_cd8(all.deseq.results.dt[
k.type == 'pos' & ref_set != 'baseline']),
tooltip = "label", session='knitr')
ggplotly(make_timeseries(all.deseq.results.dt[
k.type == 'pos' & ref_set != 'baseline']),
tooltip = "label", session='knitr')
ggplotly(make_pos_neg(all.deseq.results.dt[
ref_set == 'baseline']),
tooltip = "label", session='knitr')
ggplotly(make_pos_neg(all.deseq.results.dt[
ref_set != 'baseline']),
tooltip = "label", session='knitr')
data.output.dir <- file.path(here::here(
'..','..',
's3-roybal-tcsl',
'lenti_screen_compiled_data','data'))
load(
file=file.path(data.output.dir, 'pooled_deseq2_data.Rdata'))
all.deseq.results.dt[, padj.disp := -log10(padj)]
all.deseq.results.dt[, lfc.disp := log2FoldChange]
all.deseq.results.dt[padj.disp > 10, padj.disp := Inf]
all.deseq.results.dt[abs(lfc.disp) > 5, lfc.disp := sign(lfc.disp) * Inf]
data.dt <- all.deseq.results.dt
data.dt[, CAR.type := 'other']
data.dt[CAR.align == '4-1BB', CAR.type := '4-1BB']
data.dt[CAR.align == 'CD28', CAR.type := 'CD28']
data.dt[, CAR.type := factor(
CAR.type,levels=c('other','4-1BB','CD28'))]
#CD28 and 4-1BB Colors
receptor_cols <- RColorBrewer::brewer.pal(9, 'YlGnBu')[c(6,8)]
# label significant hits
data.dt[, CAR.type.size := CAR.type]
data.dt[CAR.type == 'other' &
((padj < 0.05 & log2FoldChange > 0) |
(padj < 0.05 & log2FoldChange > 0)),
CAR.type.size := 'other_sig']
data.dt[, CAR.label.sig := '']
data.dt[CAR.type.size != 'other',
CAR.label.sig := CAR.align]
# plot subset - CTV1, CTV2, and cumulative of all 3
data.subset.dt <- data.dt[
k.type == 'pos' & (
(ref_set == 'baseline' & test_set == 'ABCD' & assay == 'CTV3') |
(ref_set == 'D' & test_set == 'A' & assay != 'CTV3'))]
# rename facets
data.subset.dt[, assay := factor(assay,
labels=c('Initial Proliferation\n(d0-d3/d4)',
'Proliferation after 1 week\n(d8-d16)',
'Cumulative Proliferation\n(d0-d24)'))]
# max out at log10-15
data.subset.dt[, padj.disp := -log10(padj)]
data.subset.dt[padj.disp > 15, padj.disp := Inf]
interbin_volcano <- ggplot(data.subset.dt,
aes(
x=log2FoldChange, y=padj.disp,
color=CAR.type,
label=CAR.label.sig,
size=CAR.type.size)) +
geom_point() +
facet_grid(t.type ~ assay) +
scale_color_manual('',
labels=c('New Receptors', '4-1BB', 'CD28'),
values=c('grey50', receptor_cols),
guide=F) +
scale_size_manual('',
labels=c('New Receptors', '4-1BB', 'CD28', 'New Receptors'),
values=c(0.8,3,3,2.5), guide=F) +
geom_hline(yintercept=-log10(0.05), linetype = 'dashed') +
labs(x='Relative Proliferation,\nLog2 Fold Change from Mean',
y='-log10(P-value)') +
scale_y_continuous(limits=c(0, 15)) +
geom_text_repel() +
theme_minimal() +
theme(panel.border=element_rect(fill=NA))
interbin_volcano
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_text_repel).
ggsave(here::here('..','figs','pooled','interbin_volcano.pdf'),
interbin_volcano, height=4, width=7, useDingbats=FALSE)
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_text_repel).
cast_4v8 <- dcast(data.dt[
k.type == 'pos' &
((ref_set == 'baseline' & test_set == 'ABCD' & assay != 'CTV1') |
(ref_set == 'D' & test_set == 'A' & assay == 'CTV1'))],
CAR.align + assay + CAR.type ~ t.type,
value.var = c("log2FoldChange", "padj.disp"))
# rename facets
cast_4v8[, assay := factor(assay,
labels=c('Initial Proliferation\n(d0-d3/d4)',
'Cumulative Proliferation\n(d0-d16)',
'Cumulative Proliferation,\n(d0-d24)'))]
# label significant hits
cast_4v8[, CAR.type.size := CAR.type]
cast_4v8[CAR.type == 'other' &
((padj.disp_CD4 > -log10(0.05) & log2FoldChange_CD4 > 0) |
(padj.disp_CD8 > -log10(0.05) & log2FoldChange_CD8 > 0)),
CAR.type.size := 'other_sig']
cast_4v8[, CAR.label.sig := '']
cast_4v8[CAR.type.size != 'other',
CAR.label.sig := CAR.align]
prolif_4v8 <- ggplot(cast_4v8,
aes(y=log2FoldChange_CD8, x=log2FoldChange_CD4,
color=CAR.type,
label=CAR.label.sig,
size=CAR.type.size)) +
geom_point() +
facet_grid(. ~ assay) +
scale_color_manual('',
labels=c('New Receptors', '4-1BB', 'CD28'),
values=c('grey50', receptor_cols)) +
scale_size_manual('',
labels=c('New Receptors', '4-1BB', 'CD28', 'New Receptors'),
values=c(0.8,3,3,2.5)) +
geom_abline(linetype='dashed') +
geom_text_repel() +
theme_minimal() +
labs(x='CD4, relative log2 fold change',
y='CD8, relative log2 fold change') +
theme(panel.border=element_rect(fill=NA)) +
guides(size=F)
ggsave(here::here('..','figs','pooled','mixed_4v8.pdf'), prolif_4v8,
height=2.5, width=7)
cast_posneg <- dcast(data.dt[
((ref_set == 'baseline' & test_set == 'ABCD' & assay != 'CTV1') |
(ref_set == 'D' & test_set == 'A' & assay == 'CTV1'))],
CAR.align + assay + CAR.type + t.type ~ k.type,
value.var = c("log2FoldChange", "padj.disp"))
# rename facets
cast_posneg[, assay := factor(assay,
labels=c('Initial Proliferation\n(d0-d3/d4)',
'Cumulative Proliferation\n(d0-d16)',
'Cumulative Proliferation,\n(d0-d24)'))]
# label significant hits
cast_posneg[, CAR.type.size := CAR.type]
cast_posneg[CAR.type == 'other' & (
(2^(-padj.disp_pos) < 0.05 & log2FoldChange_pos > 0) |
(2^(-padj.disp_neg) < 0.05 & log2FoldChange_neg > 0)
),
CAR.type.size := 'other_sig']
prolif_posneg <- ggplot(cast_posneg,
aes(y=log2FoldChange_pos, x=log2FoldChange_neg,
color=CAR.type,
label=CAR.align,
size=CAR.type.size)) +
geom_point() +
facet_grid(t.type ~ assay) +
scale_color_manual('',
labels=c('New Receptors', '4-1BB', 'CD28'),
values=c('grey50', receptor_cols)) +
scale_size_manual('',
labels=c('New Receptors', '4-1BB', 'CD28', 'New Receptors'),
values=c(0.8,3,3,2)) +
geom_abline(linetype='dashed') +
theme_minimal() +
labs(y='Relative log2 fold change, with antigen',
x='Relative log2 fold change, no antigen') +
theme(panel.border=element_rect(fill=NA)) +
guides(size=F)
ggsave(here::here('..','figs','pooled','mixed_posneg.pdf'), prolif_posneg,
height=4.5, width=7)